import json
import gzip
from datetime import datetime
from itemadapter import ItemAdapter


class JsonWriterPipeline:
    def open_spider(self, spider):
        self.file = gzip.open(f'{spider.target_user}_tweets.jsonl.gz', 'wt', encoding='utf-8')

    def close_spider(self, spider):
        self.file.close()

    def process_item(self, item, spider):
        adapter = ItemAdapter(item)

        # 转换时间格式
        if 'created_at' in adapter:
            adapter['created_at'] = datetime.strptime(
                adapter['created_at'], '%a %b %d %H:%M:%S %z %Y'
            ).isoformat()

        line = json.dumps(adapter.asdict(), ensure_ascii=False) + "\n"
        self.file.write(line)
        return item